#### 1 LIBRARIES ####
library(tidyverse)
library(rddtools)

#### 2 IMPORT ####
cepf <- read.csv("Data/RD.csv")
cepf$fdn2 <- strptime(cepf$fdn2, format = "%Y-%m-%d")

#### 3 SAMPLE INFO ####
table(cepf$wave)

#Set number of post-plebiscite elections, depending on date of survey
cepf$elections <- 13
cepf$elections[cepf$wave<58] <- 12
cepf$elections[cepf$wave>61] <- 15
mean(cepf$elections)

#Proportion of respondents who report being registered, grouped by plebiscite eligibility
cepf%>%
  group_by(elig)%>%
  summarise(reg.reported=mean(reg,na.rm=T))

#Proportion identifying with a party
mean(cepf$pid,na.rm=T)

#### 4 FIGURE 3 ####
sum(cepf$dist<365*15)

h <- 365*15
ggplot(cepf[cepf$dist<h,],aes(x=ym,y=pid))+
  stat_summary(data=cepf[cepf$dist<h&cepf$elig==0,],
               fun.y="mean", geom="point",size=1,color="gray85")+
  stat_summary(data=cepf[cepf$dist<h&cepf$elig==1,],
               fun.y="mean", geom="point",size=1,color="gray85")+
  stat_summary(data=cepf[cepf$dist<h,],
               aes(x=year),fun.y="mean", geom="point")+
  stat_smooth(data=cepf[cepf$elig==1&cepf$dist<h,],method = "lm",
              col = "navyblue")+
  stat_smooth(data=cepf[cepf$elig==0&cepf$dist<h,],method = "lm",
              col = "firebrick2")+
  xlab("Date of Birth")+
  scale_y_continuous(limits=c(0,1))+
  ylab("Proportion Identifying with a Party")+
  geom_vline(xintercept=1970.75,color="gray70",linetype=2)+
  theme_bw()

ggsave("Figures/Fig3.pdf",width=4,height=4)

#### 5 MANY BANDWIDTHS TEST - PID ####
#### 5.1 CALCULATIONS ####
## no controls
n <- seq(365,365*25,1)
beta <- rep(NA,length(n))
p <- rep(NA,length(n))
band <- data.frame(n,beta,p)

for(i in 1:length(band$n)){
  n <- band$n[i]
  logreg <- lm(pid~elig*diff_in_days,data=cepf[cepf$dist<n,])
  band$p[i] <- summary(logreg)$coefficients[2,4]
  band$beta[i] <- summary(logreg)$coefficients[2,1]
  band$se[i] <- summary(logreg)$coefficients[2,2]
  band$obs[i] <- summary(logreg)$df[1]+summary(logreg)$df[2]
}

bandpid <- band
#write.csv(x=bandpid,file="Data/BW-PID.csv")

## control for survey wave
n <- seq(365,365*25,1)
beta <- rep(NA,length(n))
p <- rep(NA,length(n))
band <- data.frame(n,beta,p)

for(i in 1:length(band$n)){
  n <- band$n[i]
  logreg <- lm(pid~elig*diff_in_days+as.factor(wave),data=cepf[cepf$dist<n,])
  band$p[i] <- summary(logreg)$coefficients[2,4]
  band$beta[i] <- summary(logreg)$coefficients[2,1]
  band$se[i] <- summary(logreg)$coefficients[2,2]
  band$obs[i] <- summary(logreg)$df[1]+summary(logreg)$df[2]
}

bandpidwave <- band
# write.csv(x=bandpidwave,file="Data/BW-PID-sw.csv")

## coalition ID
n <- seq(365,365*25,1)
beta <- rep(NA,length(n))
p <- rep(NA,length(n))
band <- data.frame(n,beta,p)

for(i in 1:length(band$n)){
  n <- band$n[i]
  logreg <- lm(cid~elig*diff_in_days+as.factor(wave),data=cepf[cepf$dist<n,])
  band$p[i] <- summary(logreg)$coefficients[2,4]
  band$beta[i] <- summary(logreg)$coefficients[2,1]
  band$se[i] <- summary(logreg)$coefficients[2,2]
  band$obs[i] <- summary(logreg)$df[1]+summary(logreg)$df[2]
}

bandcid <- band
# write.csv(x=bandcid,file="Data/BW-CID.csv")


#### 5.2 FIGURE 4 ####
bandpid <- read.csv("Data/BW-PID.csv")

ggplot(bandpid,aes(y=beta,x=n/365))+
  geom_errorbar(aes(ymax = (beta)+2*se, ymin = (beta)-2*se,width=0),
                color="gray50",alpha=.01)+
  geom_line()+
  xlab("Bandwidth (years)")+
  scale_x_continuous(breaks=seq(4,25,4),minor_breaks = seq(2,25,2),
                     limits=c(1,25),expand=c(0,0))+
  ylab("Estimated Eligibility Effect (with 95% CI)")+
  geom_hline(yintercept = 0)+
  theme_bw()

ggsave("Figures/Fig4.pdf",height = 4,width = 4)

#### 5.3 FIGURE A.5 ####
bandpidwave <- read.csv("Data/BW-PID-sw.csv")

ggplot(bandpidwave,aes(y=beta,x=n/365))+
  geom_errorbar(aes(ymax = (beta)+2*se, ymin = (beta)-2*se,width=0),
                color="gray50",alpha=.01)+
  geom_line()+
  xlab("Bandwidth (years)")+
  scale_x_continuous(breaks=seq(4,25,4),minor_breaks = seq(2,25,2),
                     limits=c(1,25),expand=c(0,0))+
  ylab("Estimated Eligibility Effect (with 95% CI)")+
  geom_hline(yintercept = 0)+
  theme_bw()

ggsave(filename = "Figures/FigA5.pdf",width = 4,height=4)

#### 5.4 FIGURE A.6 ####
bandcid <- read.csv("Data/BW-CID.csv")

ggplot(bandcid,aes(y=beta,x=n/365))+
  geom_errorbar(aes(ymax = (beta)+2*se, ymin = (beta)-2*se,width=0),
                color="gray50",alpha=.01)+
  geom_line()+
  xlab("Bandwidth (years)")+
  scale_x_continuous(breaks=seq(4,25,4),minor_breaks = seq(2,25,2),
                     limits=c(1,25),expand=c(0,0))+
  ylab("Estimated Eligibility Effect (with 95% CI)")+
  geom_hline(yintercept = 0)+
  theme_bw()

ggsave(filename = "Figures/FigA6.pdf",width = 4,height=4)


#### 6 COMPLIERS ESTIMATE ####

## Limit the sample to surveys conducted after the 2009 election
ceptest <- cepf[cepf$wave>61,]
## Only respondents who were eligible for the 2009 election
ceptest <- ceptest[ceptest$fdn2<"1991-12-13",]

## Define the optimal bandwidth
library(rddtools)
ceptest$did2 <- as.numeric(ceptest$diff_in_days)
rd <- rdd_data(x=ceptest$did2,y=ceptest$pid,cutpoint=0)
rdd_bw_ik(rd)
h <- rdd_bw_ik(rd)
sum(cepf$dist<h)

## Registration effect (COMPLIERS ESTIMATE)
mr <- (lm(reg~elig*diff_in_days,data=cepf[cepf$dist<h,]))
summary(mr)

# compliers:
# .17

## self-reported registration rate
mean(ceptest$reg,na.rm=T)
## self-reported registration rate among pleb-eligible
sr1 <- mean(ceptest$reg[ceptest$elig==1],na.rm=T)
## actual registration rate for plebiscite
ar1 <- .92
## proportion of sample that was eligible for plebiscite
pi <-  mean(ceptest$elig,na.rm=T)
## self-reported registraton among pleb-ineligible
sr0 <- mean(ceptest$reg[ceptest$elig==0],na.rm=T)
## estimate of actual registration among pleb-ineligible (overall registration was 68%, survey was representative)
ar0 <- (.68-(pi*.92))/(1-pi)
## difference between actual and self-reported reg rates
flat.adj <- sr0-ar0
flat.adj1 <- sr1-ar1

## create placeholder variable for proportional adjustment
ceptest$regx <- NA

## we use the over-reporting rate to determine the proportion of those who claim to be registered but are actually un-registered
## we then randomly assign that proportion to un-registered status
## pleb-eligible, self-reported reg=1:
set.seed(135189)
ceptest$regx[ceptest$elig==1&ceptest$reg==1] <- 
  rbinom(sum(ceptest$elig==1&ceptest$reg==1,na.rm=T),1,
         (sr1-ar1)/sr1)
## pleb-ineligible, self-reported reg=0:
ceptest$regx[ceptest$elig==0&ceptest$reg==1] <- 
  rbinom(sum(ceptest$elig==0&ceptest$reg==1,na.rm=T),1,
         (sr0-ar0)/sr0)

## we assume that the social desirability bias only runs in one direction
ceptest$regx[ceptest$reg==0] <- 0
## reg2 is our new estimate of registration, using the proportional adjustment
ceptest$reg2 <- ceptest$reg-ceptest$regx

## we also apply a flat adjustment (simply shift the estimate for any given age group equal to the total over-reprorting rate within the treatment (pleb-eligibility) group)
ceptest$reg3 <- ceptest$reg
ceptest$reg3[ceptest$elig==1] <- ceptest$reg[ceptest$elig==1]-flat.adj1
ceptest$reg3[ceptest$elig==0] <- ceptest$reg[ceptest$elig==0]-flat.adj

#### 6.1 FIGURE A.4 ####
h <- 365*15

ggplot(ceptest[ceptest$dist<h,],aes(x=ym,y=reg3))+
  stat_summary(data=ceptest[ceptest$dist<h&ceptest$elig==1,],
               aes(x=year),fun.y="mean", geom="point", col = "black")+
  stat_summary(data=ceptest[ceptest$dist<h&ceptest$elig==0,],
               aes(x=year),fun.y="mean", geom="point", col = "black")+
  stat_smooth(data=ceptest[ceptest$elig==1&ceptest$dist<h,],method = "lm",
              col = "red")+
  stat_smooth(data=ceptest[ceptest$elig==0&ceptest$dist<h,],method = "lm",
              col = "red")+
  geom_hline(yintercept=0,size=.1)+
  geom_hline(yintercept=1,size=.1)+
  scale_y_continuous(expand=c(0,0))+
  xlab("Date of Birth")+
  ylab("Registration Rate")+
  ggtitle("Adjusted for Over-Reporting",subtitle = "Flat Adjustment")+
  theme_bw()

ggsave(filename = "Figures/FigA4-3.pdf",width = 3,height=3.5)

ggplot(ceptest[ceptest$dist<h,],aes(x=ym,y=reg2))+
  stat_summary(data=ceptest[ceptest$dist<h&ceptest$elig==1,],
               aes(x=year),fun.y="mean", geom="point", col = "black")+
  stat_summary(data=ceptest[ceptest$dist<h&ceptest$elig==0,],
               aes(x=year),fun.y="mean", geom="point", col = "black")+
  stat_smooth(data=ceptest[ceptest$elig==1&ceptest$dist<h,],method = "lm",
              col = "red")+
  stat_smooth(data=ceptest[ceptest$elig==0&ceptest$dist<h,],method = "lm",
              col = "red")+
  geom_hline(yintercept=0,size=.1)+
  geom_hline(yintercept=1,size=.1)+
  scale_y_continuous(expand=c(0,0))+
  xlab("Date of Birth")+
  ylab("Registration Rate")+
  ggtitle("Adjusted for Over-Reporting",subtitle = "Proportional Adjustment")+
  theme_bw()

ggsave(filename = "Figures/FigA4-2.pdf",width = 3,height=3.5)

ggplot(ceptest[ceptest$dist<h,],aes(x=ym,y=reg3))+
  stat_summary(data=ceptest[ceptest$dist<h&ceptest$elig==1,],
               aes(x=year),fun.y="mean", geom="point", col = "black")+
  stat_summary(data=ceptest[ceptest$dist<h&ceptest$elig==0,],
               aes(x=year),fun.y="mean", geom="point", col = "black")+
  stat_smooth(data=ceptest[ceptest$elig==1&ceptest$dist<h,],method = "lm",
              col = "red")+
  stat_smooth(data=ceptest[ceptest$elig==0&ceptest$dist<h,],method = "lm",
              col = "red")+
  geom_hline(yintercept=0,size=.1)+
  geom_hline(yintercept=1,size=.1)+
  scale_y_continuous(expand=c(0,0))+
  xlab("Date of Birth")+
  ylab("Registration Rate")+
  ggtitle("Self-Reported Registration",subtitle = "Raw Values")+
  theme_bw()

ggsave(filename = "Figures/FigA4-1.pdf",width = 3,height=3.5)


#### 6.2 ESTIMATES ####
## find the IK optimal bandwidth
ceptest$did2 <- as.numeric(ceptest$diff_in_days)
rd <- rdd_data(x=ceptest$did2,y=ceptest$reg,cutpoint=0)
rdd_bw_ik(rd)
h <- rdd_bw_ik(rd)

## raw
summary(lm(reg~elig*diff_in_days,data=ceptest[ceptest$dist<h,]))$coefficients[2,1]
## proportional adjustment
summary(lm(reg2~elig*diff_in_days,data=ceptest[ceptest$dist<h,]))$coefficients[2,1]
## flat adjustment
summary(lm(reg3~elig*diff_in_days,data=ceptest[ceptest$dist<h,]))$coefficients[2,1]


raw <- summary(lm(reg~elig*diff_in_days,data=ceptest[ceptest$dist<h,]))$coefficients[2,1]
padj <- summary(lm(reg2~elig*diff_in_days,data=ceptest[ceptest$dist<h,]))$coefficients[2,1]
fadj <- summary(lm(reg3~elig*diff_in_days,data=ceptest[ceptest$dist<h,]))$coefficients[2,1]


## changes:
(padj-raw)/raw
(fadj-raw)/raw



#### 7 RD: IK OPTIMAL BANDWIDTH ####

## Define the optimal bandwidth
library(rddtools)
cepf$did2 <- as.numeric(cepf$diff_in_days)
rd <- rdd_data(x=cepf$did2,y=cepf$pid,cutpoint=0)
rdd_bw_ik(rd)
h <- rdd_bw_ik(rd)
ikh <- rdd_bw_ik(rd)
sum(cepf$dist<h)

## Regression at this bandwidth
m2 <- (lm(pid~elig*diff_in_days,data=cepf[cepf$dist<h,]))
summary(m2)

## Registration effect (COMPLIERS ESTIMATE)
mr <- (lm(reg~elig*diff_in_days,data=cepf[cepf$dist<h,]))
summary(mr)
summary(mr)$coefficients[2,1]

#### 7.2 FIG A.3 ####
library(rdd)
DCdensity(cepf$diff_in_days,htest=T,verbose=T)

DCdensity(cepf$diff_in_days,htest=T,verbose=T)

# export manually according to specs below:
#ggsave("Figures/FigA3.pdf",width=4.5,height=4)


#### 7.3 LATE INTERPRETATION ####
ATE <- function(ITT,C,E){
  1-(1-ITT/C)^(1/E)
}

compliers <- summary(mr)$coefficients[2,1]
elections <- mean(cepf$elections[cepf$dist<h],na.rm=T)

ATE(.041,compliers,elections)
ATE(.041,compliers*fadj/raw,elections)
ATE(.041,compliers*padj/raw,elections)


effect <- .02
neteffect <- 1-(1-effect)^10
itt <- neteffect*compliers
summary(lm(pid ~ elig*diff_in_days, offset= itt*elig, 
           data=cepf[cepf$dist<h,]))$coefficients[2,4]/2


#### 8 DIFFERENCE IN MEANS (FIG A.7) ####

n <- seq(4,365*8,1)
dim <- data.frame(n)
dim$pid <- NA
dim$pid.p <- NA
dim$obs <- NA

for(i in 1:length(n)){
  temp <- cepf[cepf$dist<=n[i],]
  dim$obs[i] <- length(temp$pid)
  dim$pid[i] <- mean(temp$pid[temp$elig==1],na.rm=T)-mean(temp$pid[temp$elig==0],na.rm=T)
  dim$pid.p[i] <- t.test(temp$pid[temp$elig==1],temp$pid[temp$elig==0])$p.value
}

sum(dim$pid.p<0.05,na.rm=T)

ggplot(dim,aes(x=n/365,y=pid))+
  geom_line(color="firebrick")+
  geom_hline(yintercept = 0)+
  xlab("Bandwidth (Years)")+
  ylab("Difference in Means")+
  theme_bw()

ggsave("Figures/FigA7.pdf",width = 4,height = 4)



#### 9 POWER ANALYSIS ####

#### 9.1 FIGURE A.8 ####
pwr1 <- read.csv("Data/rd-pwr.csv")
pwr1$power <- NA

set.seed(190239)

sims <- 2000
for(j in 1:length(pwr1$Bandwidth)){
  h <- pwr1$Bandwidth[j]
  effect <- pwr1$Effect[j]
  #trim sample to bandwidth
  cepfs <- cepf[cepf$diff_in_days<h,]
  #establish number of partisans in sample
  totalprob <- mean(cepfs$pid)
  prob0 <- totalprob-mean(cepfs$elig)*effect
  prob1 <- prob0+effect
  
  
  p1 <- rep(NA,sims)
  beta1 <- rep(NA,sims)
  
  for(i in 1:sims){
    cepfs$d1 <- rbinom(n=length(cepfs$pid),size = 1,prob=prob1)
    cepfs$d0 <- rbinom(n=length(cepfs$pid),size = 1,prob=prob0)
    cepfs$d <- cepfs$d0*(1-cepfs$elig)+cepfs$d1*cepfs$elig
    
    m1 <- lm(d~elig*diff_in_days,data=cepfs)
    p1[i] <- summary(m1)$coefficients[2,4]
    beta1[i] <- summary(m1)$coefficients[2,1]
  }
  
  pwr1$power[j] <- mean(p1<0.1&beta1>0)
  print(paste("progress: ",round(100*j/length(pwr1$Bandwidth)),"%",sep=""))
}


ggplot(pwr1,aes(x=Years,y=Effect))+
  stat_smooth(method="loess",se = F,color="black")+
  scale_y_continuous(limits=c(.03,.145),breaks=seq(0,.2,.04),
                     minor_breaks = seq(0,.2,.02))+
  scale_x_continuous(breaks=seq(0,25,4))+
  ylab("Effect Size")+
  xlab("Bandwidth (Years)")+
  theme_bw()

ggsave("Figures/FigA8.pdf",width=6,height=3)

#### 9.4 FIGURE A.9 ####
pwr2 <- read.csv("Data/dim-pwr1.csv")

set.seed(190239)

sims <- 2000
for(j in 1:length(pwr2$Bandwidth)){
  h <- pwr2$Bandwidth[j]
  effect <- pwr2$Effect[j]
  #trim sample to bandwidth
  cepfs <- cepf[cepf$diff_in_days<h,]
  #establish number of partisans in sample
  totalprob <- mean(cepfs$pid)
  prob0 <- totalprob-mean(cepfs$elig)*effect
  prob1 <- prob0+effect
  
  p1 <- rep(NA,sims)
  beta1 <- rep(NA,sims)
  
  for(i in 1:sims){
    cepfs$d1 <- rbinom(n=length(cepfs$pid),size = 1,prob=prob1)
    cepfs$d0 <- rbinom(n=length(cepfs$pid),size = 1,prob=prob0)
    cepfs$d <- cepfs$d0*(1-cepfs$elig)+cepfs$d1*cepfs$elig
    
    p1[i] <- t.test(cepfs$d[cepfs$elig==1],cepfs$d[cepfs$elig==0])$p.value
    beta1[i] <- mean(cepfs$d[cepfs$elig==1],na.rm=T)-mean(cepfs$d[cepfs$elig==0],na.rm=T)
  }
  
  pwr2$power[j] <- mean(p1<0.1&beta1>0)
  print(paste("progress: ",round(100*j/length(pwr2$Bandwidth)),"%",sep=""))
}

pwr2$Years <- pwr2$Bandwidth/365

ggplot(pwr2,aes(x=Years,y=Effect))+
  stat_smooth(method="loess",se = F,color="black")+
  scale_y_continuous(limits=c(.02,.2),breaks=seq(0,.24,.04),
                     minor_breaks = seq(0,.24,.02))+
  scale_x_continuous(breaks=seq(0,8,1),limits=c(0,8.3))+
  ylab("Effect Size")+
  xlab("Bandwidth (Years)")+
  theme_bw()

ggsave("Figures/FigA9.pdf",width=6,height=3)


#### 10 REGISTRATION ####

mean(cepf$reg[cepf$elig==1])
mean(cepf$reg[cepf$elig==0])

#### 10.1 MANY BANDWIDTHS TEST - REGISTRATION ####

n <- seq(365,365*10,1)
beta <- rep(NA,length(n))
p <- rep(NA,length(n))
band <- data.frame(n,beta,p)

for(i in 1:length(band$n)){
  n <- band$n[i]
  logreg <- lm(reg~elig*diff_in_days,data=cepf[cepf$dist<n,])
  band$p[i] <- summary(logreg)$coefficients[2,4]
  band$beta[i] <- summary(logreg)$coefficients[2,1]
  band$se[i] <- summary(logreg)$coefficients[2,2]
  band$obs[i] <- summary(logreg)$df[1]+summary(logreg)$df[2]
}

bandreg <- band

ggplot(bandreg,aes(y=beta,x=n/365))+
  geom_errorbar(aes(ymax = (beta)+2*se, ymin = (beta)-2*se,width=0),
                color="gray50",alpha=.05)+
  geom_line()+
  xlab("Bandwidth (years)")+
  scale_x_continuous(breaks=seq(4,20,4),minor_breaks = seq(2,20,2))+
  ylab("Estimated Eligibility Effect (with 95% CI)")+
  geom_hline(yintercept = 0)+
  coord_flip()+
  theme_bw()

## minimum sample size for significant effect:
min(bandreg$obs[bandreg$obs>max(bandreg$obs[bandreg$p>0.05])])






